Controls Analysis after Batch correction
1 Libraries & custom functions
knitr::opts_chunk$set(
cache = FALSE,
concordance = TRUE,
prompt = TRUE, # fig.width=5, fig.height=5,
out.width = "100%",
echo = TRUE,
warning = FALSE,
message = FALSE,
error = TRUE,
tidy = FALSE,
comment = "")> # from Bioconductor (install by running: BiocManager::install("x") | x = name of the package)
> # BiocManager::install(c("DESeq2", "phyloseq", "microbiome"))
>
> #github functions
> # devtools::install_github("bryandmartin/corncob")
> # remotes::install_github("g-antonello/gautils")
> # remotes::install_github("mikemc/speedyseq")#
> # devtools::install_github("gaospecial/ggvenn")
>
> library(gautils)
> library(magrittr)
> library(ggpubr)
> library(kableExtra)
>
> # Load control samples data
> # not working yet, need to transfer from local to server
>
> asvtable <- read_tsv("/shared/statgen/microbiome/CHRISMB/controls/controls_asv_table.tsv") %>%
+ column_to_rownames("ASV")
> taxtab <- read_tsv("/shared/statgen/microbiome/CHRISMB/controls/controls_tax_table.tsv") %>%
+ set_rownames(as.character(.$ASV))
> metadata <- read_tsv("/shared/statgen/microbiome/CHRISMB/controls/controls_meta_table.tsv")
>
> refseq <- Biostrings::readDNAStringSet(filepath = "/shared/statgen/microbiome/CHRISMB/controls/controls_refseq.phy")
>
> # check identity of rownames and colnames. for the ASVs, the reduce/Reduce strategy did not work
> samples_order_identical <- all(purrr::reduce(list(colnames(asvtable), metadata$Sample_ID), identical))
> ASVs_order_identical <- all(sapply(1:nrow(expand.grid(c(1,2,3), c(1,2,3))), function(i) identical(list(rownames(asvtable), rownames(taxtab), names(refseq))[[expand.grid(c(1,2,3), c(1,2,3))[i,1]]],list(rownames(asvtable), rownames(taxtab), names(refseq))[[expand.grid(c(1,2,3), c(1,2,3))[i,2]]])) )
>
> if(samples_order_identical & ASVs_order_identical){
+ phy_only_ctrls <- phyloseq(otu_table(asvtable, taxa_are_rows = T), tax_table(as.matrix(taxtab)), sample_data(metadata %>% set_rownames(NULL)), refseq(refseq))
+ } else{
+ stop("Rownames or colnames of phyloseq component tables do not match")
+ }2 Beta diversity
> # all
> theme_set(theme_light())
> phy_only_ctrls_nonZero <- subset_samples(phy_only_ctrls, counts_per_sample > 0)
> phy_only_ctrls_nonZero <- subset_taxa(phy_only_ctrls_nonZero, taxa_sums(phy_only_ctrls_nonZero) > 0)
>
> ord <- ordinate(microbiome::transform(phy_only_ctrls_nonZero, "compositional"), method = "PCoA", distance = "bray")
>
> basic_plot <- plot_ordination(physeq = phy_only_ctrls_nonZero,
+ ordination = ord,
+ color = "ctrl_type"
+ )
>
> pcoa_ctrls_final_all <- basic_plot + ggforce::geom_mark_ellipse(aes(group= ctrl_type, colour = ctrl_type, label = ctrl_type)) + ggtitle("All controls")
>
> # only zymo
> phy_only_ctrls_nonZero <- subset_samples(phy_only_ctrls, counts_per_sample > 0 & ctrl_type == "zymo_pos")
> phy_only_ctrls_nonZero <- subset_taxa(phy_only_ctrls_nonZero, taxa_sums(phy_only_ctrls_nonZero) > 0)
>
> ord <- ordinate(microbiome::transform(phy_only_ctrls_nonZero, "compositional"), method = "PCoA", distance = "bray")
>
> basic_plot <- plot_ordination(physeq = phy_only_ctrls_nonZero,
+ ordination = ord,
+ color = "batch"
+ )
>
> pcoa_ctrls_final_zymo <- basic_plot + ggforce::geom_mark_ellipse(aes(group= batch, colour = batch, label = batch)) + ggtitle("Zymo")
>
> # only extr ctrl
>
> phy_only_ctrls_nonZero <- subset_samples(phy_only_ctrls, counts_per_sample > 0 & ctrl_type == "DNA_extraction_ctrl")
> phy_only_ctrls_nonZero <- subset_taxa(phy_only_ctrls_nonZero, taxa_sums(phy_only_ctrls_nonZero) > 0)
>
> ord <- ordinate(microbiome::transform(phy_only_ctrls_nonZero, "compositional"), method = "PCoA", distance = "bray")
>
> basic_plot <- plot_ordination(physeq = phy_only_ctrls_nonZero,
+ ordination = ord,
+ color = "batch"
+ )
>
> pcoa_ctrls_final_DNA_extr <- basic_plot + ggforce::geom_mark_ellipse(aes(group= batch, colour = batch, label = batch)) + ggtitle("DNA Extraction controls")
>
> # only water ctrl
>
>
> phy_only_ctrls_nonZero <- subset_samples(phy_only_ctrls, counts_per_sample > 0 & ctrl_type == "water_ctrl")
> phy_only_ctrls_nonZero <- subset_taxa(phy_only_ctrls_nonZero, taxa_sums(phy_only_ctrls_nonZero) > 0)
>
> ord <- ordinate(microbiome::transform(phy_only_ctrls_nonZero, "compositional"), method = "PCoA", distance = "bray")
>
> basic_plot <- plot_ordination(physeq = phy_only_ctrls_nonZero,
+ ordination = ord,
+ color = "batch"
+ )
>
> pcoa_ctrls_final_water <- basic_plot + ggforce::geom_mark_ellipse(aes(group= batch, colour = batch, label = batch)) + ggtitle("Water controls")
>
>
> ggsave(filename = "pcoa_bray_controls.png", height = 8, width = 10, dpi = 300,
+ plot = ggpubr::ggarrange(pcoa_ctrls_final_all, ggpubr::ggarrange(pcoa_ctrls_final_zymo,pcoa_ctrls_final_DNA_extr,pcoa_ctrls_final_water, nrow = 1, ncol = 3, common.legend = TRUE, legend = "top"), nrow = 2, ncol = 1, heights = c(0.8,1))
+ )
>
> ggpubr::ggarrange(pcoa_ctrls_final_all, ggpubr::ggarrange(pcoa_ctrls_final_zymo,pcoa_ctrls_final_DNA_extr,pcoa_ctrls_final_water, nrow = 1, ncol = 3, common.legend = TRUE, legend = "top"), nrow = 2, ncol = 1, heights = c(0.8,1))3 Zymo controls
3.1 Heatmap by ASV
3.1.1 All batches
> phy_ctrls_zymo <- subset_samples(phy_only_ctrls, ctrl_type == "zymo_pos")
> phy_ctrls_zymo <- subset_taxa(phy_ctrls_zymo, taxa_sums(phy_ctrls_zymo)/(sum(taxa_sums(phy_ctrls_zymo)))*100 > 0.1)
>
>
>
> plot_all_batches <- phy_ctrls_zymo %>%
+ abundances("compositional") %>%
+ pheatmap::pheatmap(scale = "none", annotation_col = select(meta(phy_ctrls_zymo), batch))3.1.2 CHRISMB-only batches
> phy_ctrls_zymo_chrismb <- filter_sample_data(phy_ctrls_zymo, batch %in% as.character(c(1:5, 7)))
> phy_ctrls_zymo_chrismb <- subset_taxa(phy_ctrls_zymo_chrismb, taxa_sums(phy_ctrls_zymo_chrismb)>0)
>
> plot_chrismb_batches <- phy_ctrls_zymo_chrismb %>%
+ abundances("compositional") %>%
+ pheatmap::pheatmap(scale = "none", annotation_col = select(meta(phy_ctrls_zymo), batch))Batch 4 has enough clustering to deserve some attention, let’s keep it in mind
3.1.3 PCoA
> plot_ordination(phy_ctrls_zymo_chrismb,
+ ordination,type = "samples",
+ color = "batch")+
+ #scale_color_brewer(type = "div")+
+ scale_color_viridis_d()+
+ stat_ellipse(aes(group = batch))+
+ geom_point(size = 3)##{-}
3.2 barplot
> phy_ctrls_zymo_relabund <- microbiome::transform(phy_ctrls_zymo, "compositional")
> sample_names(phy_ctrls_zymo_relabund) <- paste0("plate",phy_ctrls_zymo_relabund@sam_data$plate, "_batch", phy_ctrls_zymo_relabund@sam_data$batch)
>
> plotly::ggplotly(plot_bar(phy_ctrls_zymo_relabund, fill = "ASV")+ scale_fill_viridis_d()+scale_color_viridis_d() + ggtitle("Filtered for ASVs > 0.1%"))3.3 how much variability does batch explain?
Plate explains 60% of the variability, sequencing 30%. Nicely, this is expected, because sequencing should be more accurate than the previous steps necessary to prepare the library.
> zymo_dist_bray <- phyloseq::distance(phy_ctrls_zymo_relabund, method = "bray")
> library(vegan)
> betadisper(zymo_dist_bray, group = phy_ctrls_zymo_relabund@sam_data$batch) %>% permutest()
Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999
Response: Distances
Df Sum Sq Mean Sq F N.Perm Pr(>F)
Groups 9 0.0016822 0.00018691 1.0264 999 0.42
Residuals 20 0.0036421 0.00018210
> zymo_permanova <- vegan::adonis2(formula = zymo_dist_bray ~ plate + batch, data = microbiome::meta(phy_ctrls_zymo_relabund))
>
> kableExtra::kbl(zymo_permanova, digits = 2) %>%
+ kableExtra::kable_styling()| Df | SumOfSqs | R2 | F | Pr(>F) | |
|---|---|---|---|---|---|
| plate | 4 | 0.03 | 0.61 | 30.20 | 0 |
| batch | 8 | 0.01 | 0.30 | 7.52 | 0 |
| Residual | 17 | 0.00 | 0.09 | NA | NA |
| Total | 29 | 0.04 | 1.00 | NA | NA |